#clean up
rm(list=ls())

#load libraries
library(akima)
library(geoR)
library(scatterplot3d)  
library(ggmap)
library(lattice)
library(foreign)
library(maps)
library(spdep)
library(rgdal)
library(prevR)
library(car)
library(xtable)
library(rgdal)

#options
options(scipen=8)

#Set to working directory. Adjust the command below to reflect YOUR working directory.
#setwd('/Volumes/MONOGAN/gillAreal/ccesCode/')
#setwd('/Users/jamie/Documents/gillAreal/ccesCode/')
#setwd('/Users/jamie/Desktop/nhgis0003_shape/nhgis0003_shapefile_us_tract_1970/')

#Load ZIP code centroids
#source: http://www.census.gov/geo/maps-data/data/gazetteer2000.html
zip.0<-read.fwf('zcta5.txt', widths=c(2,5,59,9,9,14,14,12,12,10,10),col.names=c('state','zip','zcta','population','housingUnits','landMeters','waterMeters','landMiles','waterMiles','latitude','longitude'))
zip.0$zipLandKM<-zip.0$landMeters/(1000^2)
zip.0$zipPop<-zip.0$population
zip<-subset(zip.0, select=c(state,zip,latitude,longitude,zipPop,zipLandKM))

#USDA urban-rural continuum codes by county
#http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UfCK11OE4xc
usda.0<-read.csv('ruralurbancodes2003.csv')
#subtract 1 to have a 0 origin
usda.0$rural<-usda.0$X2003.Rural.urban.Continuum.Code-1
usda<-subset(usda.0, select=c(FIPS.Code,State,County.Name,rural))

#load CCES data
cces.0<-read.dta('cces08_common_output.dta',convert.factors=FALSE)
cces.1<-cces.0[!is.na(cces.0$v253),]
cces.1$FIPS.Code<-(1000*as.numeric(cces.1$v265))+cces.1$v270
cces.2<-subset(cces.1,select=c(v100,v200,v202,v203,v204,v206,v253,v256,v257,v258,v259,v263,v264,v265,v268,v270,v207,v208,v211,v212,v213,cc307,cc307a,cc310,cc311,cc312,cc313,cc317a,cc423,cc424,v209,cc333,v246,v219,v220,v221,v222,v223,v225,v226,v232,FIPS.Code))
cces.2$zip<-sprintf("%05d",cces.2$v253)

#merge cces with USDA urban/rural data
cces<-merge(y=cces.2,x=usda,by='FIPS.Code',all.y=T,all.x=F)

#merge ZIP centroids with post-study zip code (v253)
combined.0<-merge(x=zip, y=cces, by.x='zip', by.y='zip')

#drop observations where ZIP state and listed state are different
combined.0$state.2<-recode(combined.0$v259,'1="AL";2="AK";4="AZ";5="AR";6="CA";8="CO";9="CT";10="DE";11="DC";12="FL";13="GA";15="HI";16="ID";17="IL";18="IN";19="IA";20="KS";21="KY";22="LA";23="ME";24="MD";25="MA";26="MI";27="MN";28="MS";29="MO";30="MT";31="NE";32="NV";33="NH";34="NJ";35="NM";36="NY";37="NC";38="ND";39="OH";40="OK";41="OR";42="PA";44="RI";45="SC";46="SD";47="TN";48="TX";49="UT";50="VT";51="VA";53="WA";54="WV";55="WI";56="WY"')

#clean ideology, age, and sex
combined.0$ideology<-combined.0$cc317a
combined.0$age<-2008-combined.0$v207
combined.0$female<-as.numeric(combined.0$v208==2)

#education: 6 point ordinal scale, subtract 1 so "no hs" is 0
combined.0$v213[combined.0$v213>6]<-NA
combined.0$educ<-combined.0$v213-1

#race, 1=white, 2=black, recode so 3="other"
combined.0$v211[combined.0$v211>8]<-NA
combined.0$v211[combined.0$v211>2]<-3
combined.0$race<-combined.0$v211

#employment status
combined.0$empstat<-recode(combined.0$v209,'1:2=1;3:4=2;5:8=3;9=NA')

#home ownership
combined.0$ownership<-recode(combined.0$cc333,'1=0;2=1;3:4=0;8=NA')

#family income
combined.0$inc14<-combined.0$v246
combined.0$inc14[combined.0$inc14>14]<-NA

#religion variables
#all not falling into one of these categories are treated as the reference group 
#e.g., those not identifying with a common U.S. religion are the reference
combined.0$catholic<-as.numeric(combined.0$v219==2)
combined.0$mormon<-as.numeric(combined.0$v219==3)
combined.0$orthodox<-as.numeric(combined.0$v219==4)
combined.0$jewish<-as.numeric(combined.0$v219==5)
combined.0$islam<-as.numeric(combined.0$v219==6)
combined.0$mainline<-as.numeric( (combined.0$v219==1 & combined.0$v220%in%c(7,8,9)) | (combined.0$v219==1 & combined.0$v220==11 & combined.0$v232%in%c(1,90)) | (combined.0$v219==1 & combined.0$v220==1 & combined.0$v222%in%c(2,3,4)) | (combined.0$v219==1 & combined.0$v220==2 & combined.0$v223%in%c(1,3,5,90)) | (combined.0$v219==1 & combined.0$v220==4 & combined.0$v225%in%c(1,4)) | (combined.0$v219==1 & combined.0$v220==5 & combined.0$v226%in%c(1,90)) )
combined.0$evangelical<-as.numeric( (combined.0$v219==1 & combined.0$v220%in%c(3,6,10,12))   | (combined.0$v219==1 & combined.0$v220==11 & combined.0$v232==2) | (combined.0$v219==1 & combined.0$v220==1 & combined.0$v222%in%c(1,5,6,7,8,9,10,90)) | (combined.0$v219==1 & combined.0$v220==2 & combined.0$v223%in%c(2,4)) | (combined.0$v219==1 & combined.0$v220==4 & combined.0$v225%in%c(2,3)) | (combined.0$v219==1 & combined.0$v220==5 & combined.0$v226%in%c(2,3,4,5,6)) )

#congressional district
combined.0$cd<-as.numeric(combined.0$v264)
combined.0$fipsCD<-100*combined.0$v259+combined.0$cd

#weight
combined.0$weight<-combined.0$v200

#subset to relevant variables and states; drop cases where ZIP state and listed state do not link up
combined.1<-subset(combined.0, select=c(state,zip,latitude,longitude,ideology,age,female,educ,race,empstat,ownership,inc14,catholic,mormon,orthodox,jewish,islam,mainline,evangelical,FIPS.Code,rural,zipPop,zipLandKM,weight,cd,fipsCD),subset=state!="AK" & state!="HI" & state==state.2)

combined.2<-na.omit(combined.1)
dim(combined.2)
table(combined.2$state)[-c(1,12,40)]
min(table(combined.2$state)[-c(1,12,40)])

#re-project data
coordinates(combined.2)<- c('longitude','latitude')
proj4string(combined.2)<-CRS("+proj=longlat +datum=NAD83")

#source: http://spatialreference.org/ref/esri/102003/proj4/
combined.3<-spTransform(combined.2,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))

#better names
colnames(combined.3@coords)<-c('eastings','northings')

#visualization of observations in space
#postscript('respLocations.eps',width=5,height=5)
#pdf('respLocations.pdf')
plot(combined.3, pch='.')
axis(1);axis(2)
box()
#dev.off()

#jitter, listwise delete missing (1311--all ideology), and create new dataset
combined.4<-combined.3@data
radii<-sqrt(combined.4$zipLandKM/pi)#radii to jitter by
radii[radii==0]<-.1
combined.4$eastings<-combined.4$northings<-NA
for(i in 1:length(combined.3@coords[,1])){
	combined.4$eastings[i]<-jitter(combined.3@coords[i,1],amount=radii[i])
	combined.4$northings[i]<-jitter(combined.3@coords[i,2],amount=radii[i])
	}
dim(combined.4)
summary(combined.4)

plot(combined.3, pch='.')
points(x=combined.4$eastings, y=combined.4$northings, pch='.',col='red')
axis(1);axis(2)
box()

#drop the data into a Stata file
write.dta(combined.4, 'cces08reformatted.dta')
